import pandas as pd
df = pd.read_csv('city.csv', sep=';')
df
df.groupby('CountryCode').sum()
df.groupby('CountryCode', as_index=False)[['Population']].max()
df.filter(items=['ID', 'Name'])
df[ ['ID', 'Name'] ]
# df.groupby('CountryCode', as_index=False).count().sort_values(by='Population', ascending=False).head(8)['Population'].sum()
df.groupby('CountryCode', as_index=False).count().sort_values(by='Population', ascending=False).head(10)
(df.groupby('CountryCode', as_index=False)
.filter(lambda x: len(x['Population']) > 100)
.groupby('CountryCode', as_index=False)
.max())
df2 = (df.groupby('CountryCode', as_index=False)
.filter(lambda x: len(x['Population']) > 100)
.groupby('CountryCode', as_index=False)
.max())
df2
df2[ df2.Population > 5000000 ][['CountryCode', 'Population']]
df.groupby('CountryCode')\
.apply(lambda x: x['Population'].max() if len(x['CountryCode']) > 100 else 0)\
.to_frame()\
.sort_values(by=0, ascending=False)\
.rename(columns={0: 'Population'})\
.query('Population > 5000000')\
.reset_index()
df
df[ df.Population > 8000000 ]
df.query('Population > 8000000')